from arter.items import ArterItem
import scrapy
import re


class ArtSpider(scrapy.Spider):
    name = 'art'
    allowed_domains = ['www.art114.cn']
    start_urls = ['http://www.art114.cn/']
    url_province=None
    names = []

    def parse(self, response):#进入到网站首页
        yield scrapy.Request(self.start_urls[0]+'hualang/index.html',callback=self.parse_hualang)

    def parse_hualang(self,response):#进入到黄廊分支页
        #原链接
        self.url_province = response.request.url
        #获取tr链接
        st = response.xpath('/html/body/div[3]/center/div/center/table/tr')
        for tr in st:
            tds = tr.xpath('.//td')
            for td in tds:
                meta={}
                provice = td.xpath('.//div[1]//a//text()').extract_first()
                provice_ur = td.xpath('.//div[1]//a//@href').extract_first()
                if provice_ur is None:
                    continue
                else:
                    #获取省份链接
                    url_province = re.sub('index.html',provice_ur,self.url_province)  
                    yield scrapy.Request(url_province,callback=self.parse_title,meta={'province':provice})


    def parse_title(self,response):#进入到各个省份获取所有画廊链接
        item = ArterItem()
        province = response.meta['province']
        html=response.text
        pat = 'http://\w+\.\w+\.\w+/">[\u4e00-\u9fa5]+'
        result = re.findall(pat,html)
        for res in result:
            ss = res.split('">')
            item['srname'] = ss[1]
            item['fileurl'] = ss[0]
            yield item